estimation <- function(data, i, indexes, indexes_columns, event_date, type = 'LASSO', seed, business_cal = TRUE, 
                       estim_start_days = -81L, estim_stop_days = -21L, event_start_days = -20L, event_stop_days = 20L){
  # Estimation window function takes a dataset as input that has, as columns, predictive indexes and,
  # as rows, daily observations relative to the stock prices to be predicted
  options(tidyverse.quiet = TRUE)
  library(tidyverse, quietly = TRUE)
  `%!in%` <- Negate(`%in%`)
  
  if (class(event_date) != 'Date'){
    stop("Please supply a 'Date' object type as event_date argument")
  } 
  
  if (type != 'LASSO' & type != 'OLS') {
    stop("Please choose among 'OLS' and 'LASSO' as possible estimation procedure (type argument)")
  }
  
  if (business_cal){
    library(bizdays) # for computing business days
    calendar <- create.calendar('biz_calendar', weekdays = c('saturday', 'sunday'))
    # keep only calendar days
    data <- data %>% filter(is.bizday(date, calendar))
    indexes <- indexes %>% filter(date %in% unique(data$date))
    
    # Set window lengths:
    # event window
    event_start <- as.Date(offset(event_date, event_start_days, cal = calendar))
    event_stop <- as.Date(offset(event_date, event_stop_days, cal = calendar))
    
    estim_stop <- as.Date(offset(event_date, estim_stop_days, cal = calendar))
    estim_start <- as.Date(offset(event_date, estim_start_days, cal = calendar))
  } else {
    # Set window lengths:
    # event window
    event_start <- event_date + event_start_days
    event_stop <- event_date + event_stop_days
    
    estim_stop <- event_date + estim_stop_days
    estim_start <- event_date + estim_start_days
  }
  
  if (type == 'LASSO'){
    library(glmnet, quietly = TRUE)
    lasso <- indexes %>%
      select(date, all_of(indexes_columns)) %>%
      ungroup() %>%
      filter(date >= estim_start &
               date <= event_stop) %>%
      distinct()
    
    cols <- colnames(lasso)
    out <- map(.x = cols,
               .f = function(x) sum(is.na(lasso[,x]))) %>% 
      unlist()
    # drop indexes with too many missing values
    drop <- which(out > median(out))
    keep <- 1:ncol(lasso) %!in% drop
    lasso <- lasso[,keep]
    
    # left-join indexes
    dat <- data %>%
      filter(ticker == i &
               date >= estim_start &
               date <= event_stop) %>%
      filter(!is.na(chg)) %>%
      left_join(lasso,
                by = 'date')
    
    # remove date from lasso
    lasso <- lasso %>%
      select(-date)
    
    # initialize results dataframes:
    estim_results <- NULL
    event_results <- NULL
    
    # and an error vector:
    ID_errors <- NULL
    
    # create estimation data
    estim_win <- dat %>%
      filter(date >= estim_start &
               date <= estim_stop) %>%
      filter(!is.na(chg))
    
    # trycatch the model
    tryCatch({ 
      set.seed(seed)
      # LASSO market models:
      x <- estim_win %>%
        ungroup() %>%
        select(all_of(colnames(lasso)), chg) %>% 
        na.omit() %>%
        select(-chg) %>% as.matrix()
      y <- estim_win %>%
        ungroup() %>%
        select(all_of(colnames(lasso)), chg) %>% 
        na.omit() %>%
        select(chg) %>% as.matrix()
      
      lasso_cv <- cv.glmnet(x, y, family = "gaussian", nfolds = 5)
      best_lambda <- lasso_cv$lambda.min
      # get coefficients of best model
      best_model <- glmnet(x, y, alpha = 1, lambda = best_lambda)
      
      # find R-squared of model on training data
      y_predicted <- predict(best_model, s = best_lambda, newx = x)
      
      sst <- sum((y - mean(y))^2)
      sse <- sum((y_predicted - y)^2)
      r2 <- 1 - sse/sst

      dat$fitted_LASSO <- predict(best_model, 
                                  s = best_lambda, 
                                  newx = dat %>%
                                    ungroup() %>%
                                    select(all_of(colnames(lasso))) %>%
                                    as.matrix()) %>%
        as.numeric()
      dat$abnormal_LASSO <- dat$chg - dat$fitted_LASSO
      dat$abnormal_LASSO <- as.numeric(dat$abnormal_LASSO)
      
      # store results
      results <- data.frame(ticker = i,
                            R2 = r2)
      estim_results <- rbind(estim_results, results)
      
      results <- dat %>% 
        select(ticker, date, chg, fitted_LASSO, abnormal_LASSO) %>%
        arrange(date) %>%
        mutate(event_window = case_when(date >= event_start & date <= event_stop ~ 1,
                                        TRUE ~ 0))
      event_results <- rbind(event_results, results)
    }, 
    error = function(x){
      ID_errors <<- c(ID_errors, i) # parent environment assignment (need to "go out" of function environment)
      return(ID_errors)
    }
    )
  } else {
    ols <- indexes %>%
      select(date, all_of(indexes_columns))
    
    # left-join indexes
    dat <- data %>%
      filter(ticker == i &
               date >= estim_start &
               date <= event_stop) %>%
      filter(!is.na(chg)) %>%
      left_join(ols,
                by = 'date')
    
    # remove date from lasso
    ols <- ols %>%
      select(-date)
    
    # initialize results dataframes:
    estim_results <- NULL
    event_results <- NULL
    
    # and an error vector:
    ID_errors <- NULL
    
    # create estimation data
    estim_win <- dat %>%
      filter(date >= estim_start &
               date <= estim_stop) %>%
      filter(!is.na(chg))
    
    # trycatch the model
    tryCatch({ 
      # OLS market models:
      mod <- lm(chg ~ ., 
                data = estim_win %>%
                  select(chg, all_of(colnames(ols))))
      r2 <- summary(mod)$r.squared

      dat$fitted_ols <- predict(mod, newdata = dat)
      dat$abnormal_ols <- dat$chg - dat$fitted_ols
      
      # store results
      results <- data.frame(ticker = i,
                            R2 = r2)
      estim_results <- rbind(estim_results, results)
      
      results <- dat %>% 
        select(ticker, date, abnormal_ols, fitted_ols, chg) %>%
        arrange(date) %>%
        mutate(event_window = case_when(date >= event_start & date <= event_stop ~ 1,
                                        TRUE ~ 0))
      event_results <- rbind(event_results, results)
    }, 
    error = function(x){
      ID_errors <<- c(ID_errors, i) # parent environment assignment (need to "go out" of function environment)
      return(ID_errors)
    })
  } 
  
  out <- list(estim_results, event_results, ID_errors)
  return(out)
}